---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
      name:  <unnamed>
       log:  /Users/jesse/Dropbox/voter_id/Replication/table_a7.log
  log type:  text
 opened on:  17 Aug 2020, 15:17:34

. 
. use "$path/nc_dataset.dta" if new_reg == 0, clear

. 
. // drop 2018 
. drop if inlist(election, 11, 12)
(13,254,638 observations deleted)

. 
. drop new_reg noid_p_2016 dem rep

. 
. sort id election

. 
. replace voted = 0 if voted == .
(0 real changes made)

. 
. // replace voted with NA if under 18 / ineligible in that election year 
. drop if birth_year > 1998 & birth_year < 9999
(70 observations deleted)

. replace voted = . if (birth_year > 1990 & birth_year < 9999) & (election == 1 | election == 2) // 2008
(1,102,090 real changes made, 1,102,090 to missing)

. replace voted = . if (birth_year > 1992 & birth_year < 9999) & (election == 3 | election == 4) // 2010
(680,176 real changes made, 680,176 to missing)

. replace voted = . if (birth_year > 1994 & birth_year < 9999) & (election == 5 | election == 6) // 2012
(269,954 real changes made, 269,954 to missing)

. replace voted = . if (birth_year > 1996 & birth_year < 9999) & (election == 7 | election == 8) // 2014
(314 real changes made, 314 to missing)

. 
. ** create string encoding of possible pre-treatment outcome paths
. tostring voted, generate(outcome_path)
outcome_path generated as str1

. 
. by id: gen outcome_path_pretreat = outcome_path[1] + outcome_path[2] + ///
>                                                                 outcome_path[3] + outcome_path[4] + ///
>                                                                 outcome_path[5] + outcome_path[6] + ///
>                                                                 outcome_path[7] + outcome_path[8]

. 
. gen count = 1

. 
. // keep primaries only
. keep if mod(election,2) == 1
(33,136,560 observations deleted)

. 
. // generate race interactions
. replace black = 0 if black == .
(687,520 real changes made)

. replace hispanic = 0 if hispanic == .
(0 real changes made)

. replace othernw = 0 if othernw == .
(687,520 real changes made)

. gen treat_black = treat * black

. gen treat_hispanic = treat * hispanic 

. gen treat_othernw = treat * othernw

. gen no_dmv_black = no_dmv_match * black 

. gen no_dmv_hispanic = no_dmv_match * hispanic

. gen no_dmv_othernw = no_dmv_match * othernw

. 
. replace voted = 0 if voted == .
(1,026,267 real changes made)

. 
. // construct age bins, give young voters who are ineligible for some election their own age bin
. gen age_bin = 11 if (birth_year > 1990 & birth_year < 9999)
(30,381,335 missing values generated)

. replace age_bin = 12 if (birth_year > 1992 & birth_year < 9999)
(1,700,440 real changes made)

. replace age_bin = 13 if (birth_year > 1994 & birth_year < 9999)
(674,885 real changes made)

. replace age_bin = 14 if (birth_year > 1996 & birth_year < 9999)
(785 real changes made)

. // give those who are eligible over the whole period their own age decile
. xtile age_decile = birth_year if (birth_year <= 1990), nq(10)

. replace age_bin = age_decile if (birth_year <= 1990)
(30,334,280 real changes made)

. drop age_decile

. 
. compress
  variable count was float now byte
  variable treat_black was float now byte
  variable treat_hispanic was float now byte
  variable treat_othernw was float now byte
  variable no_dmv_black was float now byte
  variable no_dmv_hispanic was float now byte
  variable no_dmv_othernw was float now byte
  variable age_bin was float now byte
  (795,277,440 bytes saved)

. 
. sort id election

. 
. * vanilla diff in diff
. 
. reghdfe voted treat treat_black treat_hispanic treat_othernw, a(id election) cluster(id)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 33,136,560
Absorbing 2 HDFE groups                           F(   4,6627311) =    3205.51
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.5364
                                                  Adj R-squared   =     0.4205
                                                  Within R-sq.    =     0.0004
Number of clusters (id)      =  6,627,312         Root MSE        =     0.3269

                               (Std. Err. adjusted for 6,627,312 clusters in id)
--------------------------------------------------------------------------------
               |               Robust
         voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
---------------+----------------------------------------------------------------
         treat |  -.0714671   .0010069   -70.98   0.000    -.0734406   -.0694937
   treat_black |  -.0188227   .0014592   -12.90   0.000    -.0216826   -.0159627
treat_hispanic |   .0196965   .0029665     6.64   0.000     .0138823    .0255107
 treat_othernw |  -.0090354   .0029072    -3.11   0.002    -.0147334   -.0033374
--------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------------------+
   Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
---------------+-------------------------------------------------|
            id |            0         6627312        6627312 *   | 
      election |            4               5              1     | 
-----------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b1 = _b[treat]

. local se1 = _se[treat]

. local b1_2 = _b[treat_black]

. local se1_2 = _se[treat_black]

. local b1_3 = _b[treat_hispanic]

. local se1_3 = _se[treat_hispanic]

. local b1_4 = _b[treat_othernw]

. local se1_4 = _se[treat_othernw]

. local n1 = e(N)

. local nclust1 = e(N_clust)

. 
. * race by year
. reghdfe voted treat treat_black treat_hispanic treat_othernw, a(id race_by_year) cluster(id)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 33,136,560
Absorbing 2 HDFE groups                           F(   4,6627311) =    2580.57
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.5414
                                                  Adj R-squared   =     0.4267
                                                  Within R-sq.    =     0.0003
Number of clusters (id)      =  6,627,312         Root MSE        =     0.3251

                               (Std. Err. adjusted for 6,627,312 clusters in id)
--------------------------------------------------------------------------------
               |               Robust
         voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
---------------+----------------------------------------------------------------
         treat |   -.083095   .0010215   -81.34   0.000    -.0850972   -.0810929
   treat_black |   .0234087   .0015123    15.48   0.000     .0204446    .0263727
treat_hispanic |   .0189737   .0031627     6.00   0.000     .0127749    .0251724
 treat_othernw |   .0275656   .0030204     9.13   0.000     .0216457    .0334854
--------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------------------+
   Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
---------------+-------------------------------------------------|
            id |            0         6627312        6627312 *   | 
  race_by_year |           24              25              1     | 
-----------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b2 = _b[treat]

. local se2 = _se[treat]

. local b2_2 = _b[treat_black]

. local se2_2 = _se[treat_black]

. local b2_3 = _b[treat_hispanic]

. local se2_3 = _se[treat_hispanic]

. local b2_4 = _b[treat_othernw]

. local se2_4 = _se[treat_othernw]

. local n2 = e(N)

. local nclust2 = e(N_clust)

. 
. * age by year 
. reghdfe voted treat treat_black treat_hispanic treat_othernw, a(id age_by_year) cluster(id)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 33,089,505
Absorbing 2 HDFE groups                           F(   4,6617900) =    1662.48
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.5433
                                                  Adj R-squared   =     0.4291
                                                  Within R-sq.    =     0.0002
Number of clusters (id)      =  6,617,901         Root MSE        =     0.3245

                               (Std. Err. adjusted for 6,617,901 clusters in id)
--------------------------------------------------------------------------------
               |               Robust
         voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
---------------+----------------------------------------------------------------
         treat |  -.0403566   .0010255   -39.35   0.000    -.0423664   -.0383467
   treat_black |  -.0304476   .0014745   -20.65   0.000    -.0333376   -.0275576
treat_hispanic |  -.0139615   .0029672    -4.71   0.000     -.019777    -.008146
 treat_othernw |  -.0365581   .0028998   -12.61   0.000    -.0422416   -.0308745
--------------------------------------------------------------------------------

Absorbed degrees of freedom:
-----------------------------------------------------------------+
   Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
---------------+-------------------------------------------------|
            id |            0         6617901        6617901 *   | 
   age_by_year |          444             445              1     | 
-----------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b3 = _b[treat]

. local se3 = _se[treat]

. local b3_2 = _b[treat_black]

. local se3_2 = _se[treat_black]

. local b3_3 = _b[treat_hispanic]

. local se3_3 = _se[treat_hispanic]

. local b3_4 = _b[treat_othernw]

. local se3_4 = _se[treat_othernw]

. local n3 = e(N)

. local nclust3 = e(N_clust)

. 
. * age by race by year
. reghdfe voted treat treat_black treat_hispanic treat_othernw, a(id race_by_age_by_year) cluster(id)
(dropped 20 singleton observations)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   = 33,089,485
Absorbing 2 HDFE groups                           F(   4,6617896) =    1101.79
Statistics robust to heteroskedasticity           Prob > F        =     0.0000
                                                  R-squared       =     0.5490
                                                  Adj R-squared   =     0.4362
                                                  Within R-sq.    =     0.0001
Number of clusters (id)      =  6,617,897         Root MSE        =     0.3225

                               (Std. Err. adjusted for 6,617,897 clusters in id)
--------------------------------------------------------------------------------
               |               Robust
         voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
---------------+----------------------------------------------------------------
         treat |  -.0499651   .0010461   -47.76   0.000    -.0520154   -.0479148
   treat_black |   .0078429   .0015419     5.09   0.000     .0048209     .010865
treat_hispanic |   -.011061    .003178    -3.48   0.001    -.0172899   -.0048322
 treat_othernw |  -.0018776   .0030418    -0.62   0.537    -.0078394    .0040841
--------------------------------------------------------------------------------

Absorbed degrees of freedom:
----------------------------------------------------------------------+
        Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
--------------------+-------------------------------------------------|
                 id |            0         6617897        6617897 *   | 
race_by_age_by_year |         2179            2180              1     | 
----------------------------------------------------------------------+
* = fixed effect nested within cluster; treated as redundant for DoF computation

. local b4 = _b[treat]

. local se4 = _se[treat]

. local b4_2 = _b[treat_black]

. local se4_2 = _se[treat_black]

. local b4_3 = _b[treat_hispanic]

. local se4_3 = _se[treat_hispanic]

. local b4_4 = _b[treat_othernw]

. local se4_4 = _se[treat_othernw]

. local n4 = e(N)

. local nclust4 = e(N_clust)

. 
. 
. 
. preserve

. 
. collapse (sum) count (mean) voted, by(outcome_path_pretreat no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw election)

. 
. *** get N by path by summing together n treated and n control
. sort outcome_path no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw election

. // get treated units in each bin
. gen tot_tmp = count if no_dmv_match & ///
>         ((no_dmv_match != no_dmv_match[_n-1]) | ///
>         (no_dmv_black != no_dmv_black[_n-1]) | ///
>         (no_dmv_hispanic != no_dmv_hispanic[_n-1]) | ///
>         (no_dmv_othernw != no_dmv_othernw[_n-1]))
(5,221 missing values generated)

. // get treated units in outcome path
. egen tot_treat = sum(tot_tmp), by(outcome_path)

. // get control units in each outcome path
. by outcome_path no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw: replace tot_tmp = count[1] if _n == 1
(341 real changes made)

. // get total units in each outcome path
. egen tot = sum(tot_tmp), by(outcome_path)

. // N_voters is the number of voters who enter into the regression
. egen N_voters = sum(tot_tmp)

. // get number of elections
. unique election
Number of unique values of election is  5
Number of records is  6100

. // N is number of voters * number of elections
. gen long N = N_voters * r(unique)

. drop tot_tmp

. gen tot_control = tot - tot_treat

. 
. *** get weights for fw based on total n per stratum
. *** each stratum has 10 obs
. *** fw requires integers so need to round
. gen tot2 = round(tot/10)

. gen tot_treat2 = round(tot_treat/10)

. egen op = group(outcome_path)

. 
. 
. reghdfe voted no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw [fw=tot_treat2], a(op election)
weight tot_treat2 can only contain strictly positive integers, but 1035 zero values were found (will be dropped)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   =    544,320
Absorbing 2 HDFE groups                           F(   4, 544074) =      61.44
                                                  Prob > F        =     0.0000
                                                  R-squared       =     0.6245
                                                  Adj R-squared   =     0.6243
                                                  Within R-sq.    =     0.0005
                                                  Root MSE        =     0.2005

---------------------------------------------------------------------------------
          voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
----------------+----------------------------------------------------------------
   no_dmv_match |  -.0090778   .0008573   -10.59   0.000     -.010758   -.0073976
   no_dmv_black |  -.0026902   .0008574    -3.14   0.002    -.0043707   -.0010097
no_dmv_hispanic |   .0055842   .0008626     6.47   0.000     .0038937    .0072748
 no_dmv_othernw |   .0003545   .0008581     0.41   0.680    -.0013274    .0020363
----------------+----------------------------------------------------------------
       Absorbed |    F(241, 544074) =   3753.210   0.000             (Joint test)
---------------------------------------------------------------------------------

Absorbed degrees of freedom:
------------------------------------------------------------------+
    Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
----------------+-------------------------------------------------|
             op |          238             238              0     | 
       election |            4               5              1     | 
------------------------------------------------------------------+

. local b5 = _b[no_dmv_match]

. local se5 = _se[no_dmv_match]

. local b5_2 = _b[no_dmv_black]

. local se5_2 = _se[no_dmv_black]

. local b5_3 = _b[no_dmv_hispanic]

. local se5_3 = _se[no_dmv_hispanic]

. local b5_4 = _b[no_dmv_othernw]

. local se5_4 = _se[no_dmv_othernw]

. local n5 = N

. local nclust5 = N_voters

. 
. 
. * match on pre-treatment turnout path and race
. 
. restore

. preserve

. 
. collapse (sum) count (mean) voted, by(outcome_path_pretreat no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw race_string election)

. 
. *** get N by path by summing together n treated and n control
. sort outcome_path no_dmv_match race_string no_dmv_black no_dmv_hispanic no_dmv_othernw election

. // get treated units in each bin
. gen tot_tmp = count if no_dmv_match & ///
>         ((no_dmv_match != no_dmv_match[_n-1]) | ///
>         (no_dmv_black != no_dmv_black[_n-1]) | ///
>         (no_dmv_hispanic != no_dmv_hispanic[_n-1]) | ///
>         (no_dmv_othernw != no_dmv_othernw[_n-1]))
(11,842 missing values generated)

. // get treated units in outcome path and race
. egen tot_treat = sum(tot_tmp), by(outcome_path race_string)

. // get control units in each outcome path by race
. by outcome_path no_dmv_match race_string no_dmv_black no_dmv_hispanic no_dmv_othernw: replace tot_tmp = count[1] if _n == 1
(1,558 real changes made)

. // get total units in each outcome path
. egen tot = sum(tot_tmp), by(outcome_path race_string)

. // N_voters is the number of voters who enter into the regression
. egen N_voters = sum(tot_tmp)

. // get number of elections
. unique election
Number of unique values of election is  5
Number of records is  12855

. // N is number of voters * number of elections
. gen long N = N_voters * r(unique)

. drop tot_tmp

. gen tot_control = tot - tot_treat

. 
. *** get weights for fw based on total n per stratum
. *** each stratum has 10 obs
. *** fw requires integers so need to round
. gen tot2 = round(tot/10)

. gen tot_treat2 = round(tot_treat/10)

. egen op = group(outcome_path race_string)

. 
. 
. reghdfe voted no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw [fw=tot_treat2], a(op election)
weight tot_treat2 can only contain strictly positive integers, but 6905 zero values were found (will be dropped)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   =    218,440
Absorbing 2 HDFE groups                           F(   4, 217837) =      28.83
                                                  Prob > F        =     0.0000
                                                  R-squared       =     0.6242
                                                  Adj R-squared   =     0.6232
                                                  Within R-sq.    =     0.0005
                                                  Root MSE        =     0.2010

---------------------------------------------------------------------------------
          voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
----------------+----------------------------------------------------------------
   no_dmv_match |  -.0110771   .0011647    -9.51   0.000    -.0133599   -.0087944
   no_dmv_black |    .005214   .0018469     2.82   0.005     .0015941    .0088339
no_dmv_hispanic |   .0022933   .0043567     0.53   0.599    -.0062457    .0108323
 no_dmv_othernw |   .0037923   .0039302     0.96   0.335    -.0039108    .0114955
----------------+----------------------------------------------------------------
       Absorbed |    F(598, 217837) =    601.567   0.000             (Joint test)
---------------------------------------------------------------------------------

Absorbed degrees of freedom:
------------------------------------------------------------------+
    Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
----------------+-------------------------------------------------|
             op |          595             595              0     | 
       election |            4               5              1     | 
------------------------------------------------------------------+

. local b6 = _b[no_dmv_match]

. local se6 = _se[no_dmv_match]

. local b6_2 = _b[no_dmv_black]

. local se6_2 = _se[no_dmv_black]

. local b6_3 = _b[no_dmv_hispanic]

. local se6_3 = _se[no_dmv_hispanic]

. local b6_4 = _b[no_dmv_othernw]

. local se6_4 = _se[no_dmv_othernw]

. local n6 = N 

. local nclust6 = N_voters

. 
. restore

. 
. 
. collapse (sum) count (mean) voted, ///
>         by(outcome_path_pretreat no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw race_string age_bin election)

. 
. drop if age_bin == . 
(1,755 observations deleted)

. 
. *** get N by path by summing together n treated and n control
. sort outcome_path no_dmv_match race_string age_bin no_dmv_black no_dmv_hispanic no_dmv_othernw election

. // get treated units in each bin
. gen tot_tmp = count if no_dmv_match & ///
>         ((no_dmv_match != no_dmv_match[_n-1]) | ///
>         (no_dmv_black != no_dmv_black[_n-1]) | ///
>         (no_dmv_hispanic != no_dmv_hispanic[_n-1]) | ///
>         (no_dmv_othernw != no_dmv_othernw[_n-1]) | ///
>         (age_bin != age_bin[_n-1]))
(67,256 missing values generated)

. // get treated units in outcome path and race and age bin
. egen tot_treat = sum(tot_tmp), by(outcome_path race_string age_bin)

. // get control units in each outcome path by race and age_bin
. by outcome_path no_dmv_match race_string age_bin no_dmv_black no_dmv_hispanic no_dmv_othernw: replace tot_tmp = count[1] if _n == 1
(9,964 real changes made)

. // get total units in each outcome path race and age bin
. egen tot = sum(tot_tmp), by(outcome_path race_string age_bin)

. // N_voters is the number of voters who enter into the regression
. egen N_voters = sum(tot_tmp)

. // get number of elections
. unique election
Number of unique values of election is  5
Number of records is  71615

. // N is number of voters * number of elections
. gen long N = N_voters * r(unique)

. drop tot_tmp

. gen tot_control = tot - tot_treat

. 
. *** get weights for fw based on total n per stratum
. *** each stratum has 10 obs
. *** fw requires integers so need to round
. gen tot2 = round(tot/10)

. gen tot_treat2 = round(tot_treat/10)

. egen op = group(outcome_path race_string age_bin)

. 
. reghdfe voted no_dmv_match no_dmv_black no_dmv_hispanic no_dmv_othernw [fw=tot_treat2], a(op election)
weight tot_treat2 can only contain strictly positive integers, but 53975 zero values were found (will be dropped)
(converged in 3 iterations)

HDFE Linear regression                            Number of obs   =    209,780
Absorbing 2 HDFE groups                           F(   4, 208008) =      16.49
                                                  Prob > F        =     0.0000
                                                  R-squared       =     0.6260
                                                  Adj R-squared   =     0.6228
                                                  Within R-sq.    =     0.0003
                                                  Root MSE        =     0.1977

---------------------------------------------------------------------------------
          voted |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
----------------+----------------------------------------------------------------
   no_dmv_match |  -.0082039   .0011718    -7.00   0.000    -.0105007   -.0059072
   no_dmv_black |   .0037384   .0018513     2.02   0.043       .00011    .0073669
no_dmv_hispanic |  -.0001158   .0043332    -0.03   0.979    -.0086089    .0083772
 no_dmv_othernw |   .0013721   .0039638     0.35   0.729    -.0063967     .009141
----------------+----------------------------------------------------------------
       Absorbed |   F(1767, 208008) =    195.723   0.000             (Joint test)
---------------------------------------------------------------------------------

Absorbed degrees of freedom:
------------------------------------------------------------------+
    Absorbed FE |  Num. Coefs.  =   Categories  -   Redundant     | 
----------------+-------------------------------------------------|
             op |         1764            1764              0     | 
       election |            4               5              1     | 
------------------------------------------------------------------+

. local b7 = _b[no_dmv_match]

. local se7 = _se[no_dmv_match]

. local b7_2 = _b[no_dmv_black]

. local se7_2 = _se[no_dmv_black]

. local b7_3 = _b[no_dmv_hispanic]

. local se7_3 = _se[no_dmv_hispanic]

. local b7_4 = _b[no_dmv_othernw]

. local se7_4 = _se[no_dmv_othernw]

. local n7 = N 

. local nclust7 = N_voters

. 
. log close
      name:  <unnamed>
       log:  /Users/jesse/Dropbox/voter_id/Replication/table_a7.log
  log type:  text
 closed on:  17 Aug 2020, 15:56:25
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
